# ?register_google
# register_google(key = "AIzaSyCTk2a5vIEqcvgz9KmQmItoNF7J8_hiMMk")
#
# #uses Google API to obtain location data based on longitude and latitude....dont use unless necessary for new data
# d_respondents_only[ , c("housenumber", "street", "city", "county", "state", "zip", "country") := revgeo(as.numeric(LocationLongitude),as.numeric(LocationLatitude), provider = 'google', API = "AIzaSyCTk2a5vIEqcvgz9KmQmItoNF7J8_hiMMk", output='frame')]
# #
# head(d_respondents_only)
# #
# #
# fwrite(d_respondents_only, file='datatable_clean_survey_responses_v2.dta')
d_respondents <- fread('datatable_clean_survey_responses_v2.dta')
setnames(d_respondents,
old = c('Duration (in seconds)'),
new = c('Survey_Duration'))
head(d_respondents)
## StartDate EndDate Status IPAddress Progress
## 1: 2020-11-09 20:46:55 2020-11-09 20:50:39 IP Address 174.88.123.135 100
## 2: 2020-11-09 20:47:33 2020-11-09 20:51:24 IP Address 172.93.166.91 100
## 3: 2020-11-09 20:47:23 2020-11-09 20:51:35 IP Address 68.36.215.223 100
## 4: 2020-11-09 20:46:32 2020-11-09 20:51:43 IP Address 99.75.53.174 100
## 5: 2020-11-09 20:47:44 2020-11-09 20:52:08 IP Address 24.35.119.43 100
## 6: 2020-11-09 20:46:47 2020-11-09 20:52:39 IP Address 98.212.214.93 100
## Survey_Duration Finished RecordedDate ResponseId
## 1: 223 TRUE 2020-11-09 20:50:39 R_VLuUQ4C82PP9HEd
## 2: 231 TRUE 2020-11-09 20:51:25 R_29cCZD1XK1dpmdY
## 3: 251 TRUE 2020-11-09 20:51:35 R_3lVN8EncJofnqnV
## 4: 310 TRUE 2020-11-09 20:51:43 R_50vJlfmoFTK1IeB
## 5: 264 TRUE 2020-11-09 20:52:08 R_1dFaKMSjyE3FJHg
## 6: 351 TRUE 2020-11-09 20:52:39 R_25vjj4Ik4Dkm2UN
## RecipientLastName RecipientFirstName RecipientEmail ExternalReference
## 1: NA NA NA NA
## 2: NA NA NA NA
## 3: NA NA NA NA
## 4: NA NA NA NA
## 5: NA NA NA NA
## 6: NA NA NA NA
## LocationLatitude LocationLongitude DistributionChannel UserLanguage
## 1: 43.68 -79.29 anonymous EN
## 2: 33.75 -84.39 anonymous EN
## 3: 42.66 -83.12 anonymous EN
## 4: 42.00 -88.14 anonymous EN
## 5: 40.08 -82.97 anonymous EN
## 6: 42.01 -88.10 anonymous EN
## Amazon_Turk_ID Gender Q82_3_TEXT Age_Range Education_Level
## 1: A4D99Y82KOLC8 Male NA 35-44 Trade school
## 2: A1AC47WJLNW4G7 Male NA 25-34 Master's degree and above
## 3: A77K8W55MJEKX Female NA 45-54 Bachelor's degree
## 4: A17TKHT8FEVH0R Male NA 25-34 Associate's degree
## 5: A1A0WM0OJMOF7Z Female NA 25-34 Trade school
## 6: A2VO8C41JJIQY9 Male NA 25-34 Master's degree and above
## Q1 Q2 Q3 Q4 Q5 Q6 Q7
## 1: Pneumonia Normal Normal Pneumonia Normal Pneumonia Pneumonia
## 2: Pneumonia Normal Normal Pneumonia Normal Normal Normal
## 3: Pneumonia Pneumonia Pneumonia Pneumonia Pneumonia Pneumonia Normal
## 4: Normal Pneumonia Pneumonia Normal Pneumonia Pneumonia Pneumonia
## 5: Normal Pneumonia Pneumonia Normal Pneumonia Pneumonia Pneumonia
## 6: Pneumonia Normal Normal Pneumonia Pneumonia Pneumonia Normal
## Q8 Q9 Q10
## 1: Normal Pneumonia Pneumonia
## 2: Normal Normal Pneumonia
## 3: Normal Pneumonia Pneumonia
## 4: Normal Pneumonia Normal
## 5: Normal Pneumonia Normal
## 6: Pneumonia Pneumonia Pneumonia
## Control_Q1
## 1:
## 2:
## 3:
## 4:
## 5: The sentiment that this place brings, and how much hospitality means to them. How open, diverse and welcoming they are as well that bring people in. It's very heartfelt and warming.
## 6:
## Q70_First Click Q70_Last Click Q70_Page Submit Q70_Click Count Q11
## 1: NA NA NA NA Normal
## 2: NA NA NA NA Normal
## 3: NA NA NA NA Pneumonia
## 4: NA NA NA NA Pneumonia
## 5: 31.08 31.08 77.39 1 Normal
## 6: NA NA NA NA Normal
## Q12 Q13 Q14 Q15 Q16 Q17 Q18
## 1: Normal Pnuemonia Pneumonia Pneumonia Normal Normal Pneumonia
## 2: Normal Pnuemonia Normal Normal Pneumonia Normal Pneumonia
## 3: Normal Pnuemonia Pneumonia Pneumonia Pneumonia Pneumonia Pneumonia
## 4: Normal Pnuemonia Normal Normal Normal Pneumonia Normal
## 5: Pneumonia Normal Pneumonia Pneumonia Normal Pneumonia Pneumonia
## 6: Normal Pnuemonia Pneumonia Normal Normal Pneumonia Pneumonia
## Q19 Q20
## 1: Pneumonia Normal
## 2: Normal Pneumonia
## 3: Pneumonia Pneumonia
## 4: Pneumonia Pneumonia
## 5: Normal Pneumonia
## 6: Pneumonia Pneumonia
## Control_Q2
## 1:
## 2:
## 3:
## 4:
## 5: It brings awareness to a serious issue that can harm people. It's an advertisement to bring people together to be more knowledgeable about it.
## 6:
## Q90_First Click Q90_Last Click Q90_Page Submit Q90_Click Count Q21
## 1: NA NA NA NA Pneumonia
## 2: NA NA NA NA Normal
## 3: NA NA NA NA Pneumonia
## 4: NA NA NA NA Normal
## 5: 10.13 68.74 70.97 2 Pneumonia
## 6: NA NA NA NA Normal
## Q22 Q23 Q24 Q25 Q26 Q27 Q28
## 1: Pneumonia Normal Pneumonia Normal Pneumonia Pneumonia Normal
## 2: Pneumonia Pneumonia Normal Normal Normal Pneumonia Normal
## 3: Pneumonia Normal Pneumonia Pneumonia Pneumonia Pneumonia Pneumonia
## 4: Pneumonia Pneumonia Pneumonia Pneumonia Pneumonia Normal Pneumonia
## 5: Pneumonia Pneumonia Normal Pneumonia Normal Normal Pneumonia
## 6: Pneumonia Pneumonia Normal Pneumonia Normal Normal Pneumonia
## Q29 Q30 Q36
## 1: Normal Pneumonia
## 2: Pneumonia Normal
## 3: Pneumonia Pneumonia
## 4: Pneumonia Normal
## 5: Pneumonia Pneumonia
## 6: Normal Normal
## Self_Reflect_Q1
## 1:
## 2:
## 3:
## 4: I think I did pretty good. I was not expecting to do as well as I did.
## 5:
## 6:
## Q61_First Click Q61_Last Click Q61_Page Submit Q61_Click Count Q41
## 1: NA NA NA NA
## 2: NA NA NA NA
## 3: NA NA NA NA
## 4: 32.36 43.79 111.6 3
## 5: NA NA NA NA
## 6: NA NA NA NA
## Self_Reflect_Q2
## 1:
## 2:
## 3:
## 4: I think I did incredible. I only got 2 wrong. This was harder than the previous page.
## 5:
## 6:
## Q62_First Click Q62_Last Click Q62_Page Submit Q62_Click Count
## 1: NA NA NA NA
## 2: NA NA NA NA
## 3: NA NA NA NA
## 4: 10.06 10.06 100.3 1
## 5: NA NA NA NA
## 6: NA NA NA NA
## Q38
## 1:
## 2:
## 3:
## 4:
## 5:
## 6: Image 2Correct diagnosis: Normal\nYou chose: ${q://QID5/ChoiceGroup/SelectedChoices}\n
## Q63_First Click Q63_Last Click Q63_Page Submit Q63_Click Count Q43
## 1: NA NA NA NA
## 2: NA NA NA NA
## 3: NA NA NA NA
## 4: NA NA NA NA
## 5: NA NA NA NA
## 6: 1.205 100.7 108 16
## Q64_First Click Q64_Last Click Q64_Page Submit Q64_Click Count Q45
## 1: NA NA NA NA NA
## 2: NA NA NA NA NA
## 3: NA NA NA NA NA
## 4: NA NA NA NA NA
## 5: NA NA NA NA NA
## 6: 76.03 101.2 102.4 3 NA
## Q65_First Click Q65_Last Click Q65_Page Submit Q65_Click Count Q47
## 1: NA NA NA NA NA
## 2: 14.69 16.23 47.68 2 NA
## 3: NA NA NA NA NA
## 4: NA NA NA NA NA
## 5: NA NA NA NA NA
## 6: NA NA NA NA NA
## Q66_First Click Q66_Last Click Q66_Page Submit Q66_Click Count Q46
## 1: NA NA NA NA NA
## 2: 5.75 18.28 46.59 2 NA
## 3: NA NA NA NA NA
## 4: NA NA NA NA NA
## 5: NA NA NA NA NA
## 6: NA NA NA NA NA
## Q67_First Click Q67_Last Click Q67_Page Submit Q67_Click Count Q48
## 1: 0.855 57.41 58.20 29 NA
## 2: NA NA NA NA NA
## 3: 16.263 16.26 50.06 1 NA
## 4: NA NA NA NA NA
## 5: NA NA NA NA NA
## 6: NA NA NA NA NA
## Q68_First Click Q68_Last Click Q68_Page Submit Q68_Click Count Total_Score
## 1: 0.530 60.605 61.22 15 16
## 2: NA NA NA NA 12
## 3: 9.427 9.427 49.63 1 15
## 4: NA NA NA NA 21
## 5: NA NA NA NA 14
## 6: NA NA NA NA 18
## Random ID Assignment Q1_Score Q2_Score Q3_Score Q4_Score Q5_Score Q6_Score
## 1: 14409 FL_41 0 1 0 1 1 1
## 2: 58508 FL_16 0 1 0 1 1 0
## 3: 96075 FL_41 0 0 1 1 0 1
## 4: 74553 FL_14 1 0 1 0 0 1
## 5: 35543 FL_17 1 0 1 0 0 1
## 6: 84565 FL_15 0 1 0 1 0 1
## Q7_Score Q8_Score Q9_Score Q10_Score Q11_Score Q12_Score Q13_Score Q14_Score
## 1: 1 1 1 0 0 1 0 1
## 2: 0 1 0 0 0 1 0 0
## 3: 0 1 1 0 1 1 0 1
## 4: 1 1 1 1 1 1 0 0
## 5: 1 1 1 1 0 0 0 1
## 6: 0 0 1 0 0 1 0 1
## Q15_Score Q16_Score Q17_Score Q18_Score Q19_Score Q20_Score Q21_Score
## 1: 0 1 0 0 1 1 0
## 2: 1 0 0 0 0 0 1
## 3: 0 0 1 0 1 0 0
## 4: 1 1 1 1 1 0 1
## 5: 0 1 1 0 0 0 0
## 6: 1 1 1 0 1 0 1
## Q22_Score Q23_Score Q24_Score Q25_Score Q26_Score Q27_Score Q28_Score
## 1: 0 0 0 0 1 1 0
## 2: 0 1 1 0 0 1 0
## 3: 0 0 0 1 1 1 1
## 4: 0 1 0 1 1 0 1
## 5: 0 1 1 1 0 0 1
## 6: 0 1 1 1 0 0 1
## Q29_Score Q30_Score Assignment_Group TaskPhase1_Score TaskPhase2_Score
## 1: 1 0 Negative Images 0.7 0.5
## 2: 0 1 Positive Images 0.4 0.2
## 3: 0 0 Negative Images 0.5 0.5
## 4: 0 1 Self-Reflect 0.7 0.7
## 5: 0 0 Control 0.7 0.3
## 6: 1 1 Medical Feedback 0.4 0.6
## TaskPhase3_Score housenumber street city
## 1: 0.3 351 Glen Manor Drive Toronto
## 2: 0.5 262 Capitol Avenue Southeast Atlanta
## 3: 0.4 440 Bedlington Drive Rochester Hills
## 4: 0.6 1200 Sycamore Avenue Hanover Park
## 5: 0.4 1913 Brookfield Road Columbus
## 6: 0.7 617 Boxwood Drive Schaumburg
## county state zip country
## 1: Canada Ontario M4E 2X8 Canada
## 2: United States Georgia 30312 United States
## 3: United States Michigan 48307 United States
## 4: United States Illinois 60133 United States
## 5: United States Ohio 43229 United States
## 6: United States Illinois 60193 United States
nrow(d_respondents)
## [1] 350
#remove duplicate Amazon Turk IDs
nrow(d_respondents) #350 rows
## [1] 350
d_respondents <- d_respondents[ !duplicated(d_respondents$Amazon_Turk_ID) , ] #350 rows
create_heatmap <- function(var1, var2) {
### Create a heatmap for a table of frequencies between two variables ###
df <- data.frame(table(var1,var2))
ggplot(df,aes(x=var1,y=var2)) +
geom_tile(aes(fill=Freq,color=Freq),show.legend=FALSE,alpha=.8) +
geom_text(aes(label=Freq)) +
scale_fill_continuous(high = "darkslategray4", low = "powderblue")
}
#some EDA
#d_respondents[ , table(state, country)]
table(d_respondents$state, d_respondents$country) %>%
as.data.frame() %>%
arrange(desc(Freq)) %>%
filter(Freq>0)
## Var1 Var2 Freq
## 1 Tamil Nadu India 107
## 2 California United States 72
## 3 New York United States 22
## 4 Kansas United States 21
## 5 Texas United States 15
## 6 Florida United States 9
## 7 Massachusetts United States 7
## 8 Missouri United States 6
## 9 Connecticut United States 5
## 10 Georgia United States 5
## 11 Indiana United States 5
## 12 Michigan United States 5
## 13 New Jersey United States 5
## 14 Illinois United States 4
## 15 Virginia United States 4
## 16 Kerala India 3
## 17 Maharashtra India 3
## 18 Colorado United States 3
## 19 Kentucky United States 3
## 20 Maryland United States 3
## 21 North Carolina United States 3
## 22 Oregon United States 3
## 23 Ontario Canada 2
## 24 Alabama United States 2
## 25 Idaho United States 2
## 26 Minnesota United States 2
## 27 Mississippi United States 2
## 28 Nevada United States 2
## 29 Ohio United States 2
## 30 Pennsylvania United States 2
## 31 Washington United States 2
## 32 Qarku i Tiranës Albania 1
## 33 Khulna Division Bangladesh 1
## 34 Bahia Brazil 1
## 35 Atacama Chile 1
## 36 Provence-Alpes-Côte d'Azur France 1
## 37 Departamento de Olancho Honduras 1
## 38 Andhra Pradesh India 1
## 39 Karnataka India 1
## 40 Sardegna Italy 1
## 41 England United Kingdom 1
## 42 Arizona United States 1
## 43 Iowa United States 1
## 44 Louisiana United States 1
## 45 Maine United States 1
## 46 Nebraska United States 1
## 47 Oklahoma United States 1
## 48 South Carolina United States 1
## 49 South Dakota United States 1
## 50 Tennessee United States 1
table(d_respondents$country) %>%
as.data.frame() %>%
arrange(desc(Freq))
## Var1 Freq
## 1 United States 225
## 2 India 115
## 3 Canada 2
## 4 Albania 1
## 5 Bangladesh 1
## 6 Brazil 1
## 7 Chile 1
## 8 France 1
## 9 Honduras 1
## 10 Italy 1
## 11 United Kingdom 1
table(d_respondents$Total_Score) %>%
as.data.frame() %>%
arrange(desc(Var1))
## Var1 Freq
## 1 27 1
## 2 26 1
## 3 25 4
## 4 24 12
## 5 23 15
## 6 22 16
## 7 21 22
## 8 20 27
## 9 19 21
## 10 18 31
## 11 17 40
## 12 16 40
## 13 15 30
## 14 14 30
## 15 13 19
## 16 12 18
## 17 11 13
## 18 10 6
## 19 9 3
## 20 8 1
d_respondents %>%
group_by(Assignment_Group) %>%
summarise(mean = mean(Total_Score),
count = n(),
time_duration = mean(Survey_Duration))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 4
## Assignment_Group mean count time_duration
## <chr> <dbl> <int> <dbl>
## 1 Control 16.7 69 638.
## 2 Medical Feedback 17.8 70 656.
## 3 Negative Images 16.5 72 783
## 4 Positive Images 17.3 70 505.
## 5 Self-Reflect 17.2 69 612.
#d_respondents[ , .(count = .N, avg = mean(Total_Score)), by=Assignment_Group] #same thing
d_respondents[ , hist(Total_Score)]
## $breaks
## [1] 8 10 12 14 16 18 20 22 24 26 28
##
## $counts
## [1] 10 31 49 70 71 48 38 27 5 1
##
## $density
## [1] 0.014286 0.044286 0.070000 0.100000 0.101429 0.068571 0.054286 0.038571
## [9] 0.007143 0.001429
##
## $mids
## [1] 9 11 13 15 17 19 21 23 25 27
##
## $xname
## [1] "Total_Score"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
tapply(d_respondents$Total_Score, d_respondents$Assignment_Group, summary)
## $Control
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.0 14.0 16.0 16.7 19.0 24.0
##
## $`Medical Feedback`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.0 16.0 17.5 17.8 20.0 24.0
##
## $`Negative Images`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.0 13.0 16.0 16.5 19.2 25.0
##
## $`Positive Images`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.0 15.0 17.0 17.3 20.0 27.0
##
## $`Self-Reflect`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.0 14.0 17.0 17.2 20.0 25.0
tapply(d_respondents$Total_Score, d_respondents$Assignment_Group, sd)
## Control Medical Feedback Negative Images Positive Images
## 3.659 3.279 3.996 3.817
## Self-Reflect
## 3.882
d_respondents[ , sd(Total_Score)]
## [1] 3.743
library(ggmap)
?register_google
register_google(key = "AIzaSyCTk2a5vIEqcvgz9KmQmItoNF7J8_hiMMk")
# ggmap_show_api_key()
us_map<-get_map(location='united states', zoom=4, maptype = "terrain",
source='google',color='color')
## Source : https://maps.googleapis.com/maps/api/staticmap?center=united%20states&zoom=4&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=united+states&key=xxx
ggmap(us_map) + geom_point(x=d_respondents$LocationLongitude, y = d_respondents$LocationLatitude, show_guide = TRUE, data = d_respondents, alpha = 0.5, na.rm = T) + scale_color_gradient(low="beige", high="red")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
india_map<-get_map(location='india', zoom=5, maptype = "terrain",
source='google',color='color')
## Source : https://maps.googleapis.com/maps/api/staticmap?center=india&zoom=5&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=india&key=xxx
ggmap(india_map) + geom_point(x=d_respondents$LocationLongitude, y = d_respondents$LocationLatitude, show_guide = TRUE, data = d_respondents, alpha = 0.5, na.rm = T) + scale_color_gradient(low="beige", high="red")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
#http://www.sthda.com/english/wiki/chi-square-goodness-of-fit-test-in-r
respondent_counts <- d_respondents[ , .(.N), keyby=Assignment_Group]
respondent_counts_chisq_test <- chisq.test(respondent_counts[,2], p=c(1/5, 1/5, 1/5, 1/5, 1/5))
respondent_counts %>%
ggplot(aes(x = Assignment_Group,y=N)) +
geom_bar(stat = 'identity') +
coord_flip() +
xlab('Assignment Group') +
ylab('Number of Observations') +
labs(title='Randomization check among assignment groups',
caption = paste0('Assuming equal distribution among assignment groups, a chi-squared goodness of fit test with ',
round(respondent_counts_chisq_test$parameter,4),' degrees of \nfreedom ', 'yields p=',
round(respondent_counts_chisq_test$p.value,4),
', suggesting that the observed proportions are not significantly different from the \nexpected proportions at a significance level of 0.05.')) +
theme(plot.caption = element_text(hjust = 0))
#p-value = 0.9991, which is greater than significance level of 0.05.
#We can conclude that the observed proportions are not significantly different from the expected proportions
#let's consider adding age bins and education bins
d_respondents[ Age_Range == "18-24", age_bin := 1]
d_respondents[ Age_Range == "25-34", age_bin := 2]
d_respondents[ Age_Range == "35-44", age_bin := 3]
d_respondents[ Age_Range == "45-54", age_bin := 4]
d_respondents[ Age_Range == "55-64", age_bin := 5]
d_respondents[ Age_Range == "Above 65", age_bin := 6]
d_respondents[ Education_Level == "Associate's degree", edu_bin := 1]
d_respondents[ Education_Level == "Bachelor's degree", edu_bin := 2]
d_respondents[ Education_Level == "High school", edu_bin := 3]
d_respondents[ Education_Level == "Master's degree and above", edu_bin := 4]
d_respondents[ Education_Level == "Some high school", edu_bin := 5]
d_respondents[ Education_Level == "Trade school", edu_bin := 6]
d_respondents[ Assignment_Group == "Control", assign_bin := 1]
d_respondents[ Assignment_Group == "Medical Feedback", assign_bin := 2]
d_respondents[ Assignment_Group == "Negative Images", assign_bin := 3]
d_respondents[ Assignment_Group == "Positive Images", assign_bin := 4]
d_respondents[ Assignment_Group == "Self-Reflect", assign_bin := 5]
d_respondents[ , US_Dummy := ifelse(country == "United States", 1, 0)]
d_respondents[ , Male_Dummy := ifelse(Gender == "Male", 1, 0)]
#add treatment dummy
d_respondents[ , Treatment_Dummy := ifelse(Assignment_Group != "Control", 1, 0)]
#head(d_respondents)
d_respondents %>%
group_by(Assignment_Group) %>%
summarise(num_respondents = n(),
pre_treatment_avg = mean(TaskPhase1_Score),
taskphase2_avg = mean(TaskPhase2_Score),
taskphase3_avg = mean(TaskPhase3_Score))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 5
## Assignment_Group num_respondents pre_treatment_a… taskphase2_avg
## <chr> <int> <dbl> <dbl>
## 1 Control 69 0.607 0.461
## 2 Medical Feedback 70 0.634 0.523
## 3 Negative Images 72 0.578 0.494
## 4 Positive Images 70 0.614 0.514
## 5 Self-Reflect 69 0.599 0.526
## # … with 1 more variable: taskphase3_avg <dbl>
d_respondents %>%
group_by(Assignment_Group) %>%
summarise(num_respondents = n(),
avg_age_bin = mean(age_bin),
avg_edu_bin = mean(edu_bin),
male = mean(Male_Dummy),
US = mean(US_Dummy))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 6
## Assignment_Group num_respondents avg_age_bin avg_edu_bin male US
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Control 69 2.68 2.61 0.609 0.652
## 2 Medical Feedback 70 2.63 2.47 0.586 0.529
## 3 Negative Images 72 2.62 2.58 0.583 0.625
## 4 Positive Images 70 2.86 2.6 0.586 0.714
## 5 Self-Reflect 69 2.83 2.42 0.594 0.696
d_respondents %>%
group_by(Assignment_Group) %>%
summarise(num_respondents = n(),
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 5 x 2
## Assignment_Group num_respondents
## <chr> <int>
## 1 Control 69
## 2 Medical Feedback 70
## 3 Negative Images 72
## 4 Positive Images 70
## 5 Self-Reflect 69
#Density distribution of Survey Duration
ggplot(d_respondents, aes(x=Survey_Duration, colour=as.factor(Assignment_Group), fill = as.factor(Assignment_Group))) +
geom_density(alpha = 0.3) +
xlim(0, 1500) +
xlab("Completion Time (seconds)") +
ggtitle("Survey Duration Distribution")
## Warning: Removed 6 rows containing non-finite values (stat_density).
#Comparing pretreatment values
ggplot(d_respondents, aes(x=TaskPhase1_Score, fill = as.factor(Treatment_Dummy), colour=as.factor(Treatment_Dummy))) + geom_density(alpha = 0.3)
ggplot(d_respondents, aes(x=TaskPhase1_Score, fill = as.factor(Assignment_Group), colour=as.factor(Assignment_Group))) + geom_density(alpha = 0.3) + ggtitle("PreTreatment Scores by Group") + theme(plot.title = element_text(hjust = 0.5))
#Comparing taskphase2 values
ggplot(d_respondents, aes(x=TaskPhase2_Score, fill = as.factor(Assignment_Group), colour=as.factor(Assignment_Group))) + geom_density(alpha = 0.2) + ggtitle("TaskPhase2 Scores by Group") + theme(plot.title = element_text(hjust = 0.5))
#Comparing taskphase3 values
ggplot(d_respondents, aes(x=TaskPhase3_Score, fill = as.factor(Assignment_Group), colour=as.factor(Assignment_Group))) + geom_density(alpha = 0.2) + ggtitle("TaskPhase3 Scores by Group") + theme(plot.title = element_text(hjust = 0.5))
#boxplots for pretreatment values
ggplot(d_respondents, aes(x = Assignment_Group, y=TaskPhase1_Score, colour=as.factor(Assignment_Group))) + geom_boxplot() + stat_summary(fun.y = mean, geom = "errorbar", aes(ymax = ..y.., ymin = ..y..),
width = .75, linetype = "dashed") + ggtitle("PreTreatment Values") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(plot.title = element_text(hjust = 0.5))
## Warning: `fun.y` is deprecated. Use `fun` instead.
#boxplots for TaskPhase2 values
ggplot(d_respondents, aes(x = Treatment_Dummy, y=TaskPhase2_Score, colour=as.factor(Treatment_Dummy))) + geom_boxplot() + stat_summary(fun.y = mean, geom = "errorbar", aes(ymax = ..y.., ymin = ..y..),
width = .75, linetype = "dashed") + ggtitle("TaskPhase2 Scores") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(plot.title = element_text(hjust = 0.5))
## Warning: `fun.y` is deprecated. Use `fun` instead.
#boxplots for TaskPhase3 values
ggplot(d_respondents, aes(x = Treatment_Dummy, y=TaskPhase3_Score, colour=as.factor(Treatment_Dummy))) + geom_boxplot() + stat_summary(fun.y = mean, geom = "errorbar", aes(ymax = ..y.., ymin = ..y..),
width = .75, linetype = "dashed") + ggtitle("TaskPhase3 Scores") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(plot.title = element_text(hjust = 0.5))
## Warning: `fun.y` is deprecated. Use `fun` instead.
# TODO format figures and captions
#check balance between gender
gender_chiqq <- chisq.test(d_respondents[ , table(Assignment_Group, Gender)])
create_heatmap(var1 = d_respondents$Assignment_Group,var2 = d_respondents$Gender) +
xlab('Assignment Group') +
ylab('Gender') +
labs(title = 'Contingency table between gender and assignment group',
caption = paste0('Assuming gender distributions are the same among assignment groups, a chi-squared test for independence with ',
round(gender_chiqq$parameter,4),' \ndegrees of freedom ', 'yields p=',
round(gender_chiqq$p.value,4),
', suggesting that there is no relationship between gender and assignment groups at a \nsignificance level of 0.05.')) +
theme(plot.caption = element_text(hjust = 0))
# TODO format figures and captions
#check balance between age-range
# expected frequency count for each cell of the contingency table should be at least 5. Since this is not the case, we set simulate.p.value = TRUE to use Monte Carlo simulation
# https://stats.stackexchange.com/questions/81483/warning-in-r-chi-squared-approximation-may-be-incorrect
age_chisq <- chisq.test(d_respondents[ , table(Assignment_Group, Age_Range)],simulate.p.value = TRUE)
create_heatmap(var1 = d_respondents$Assignment_Group,var2 = d_respondents$Age_Range) +
xlab('Assignment Group') +
ylab('Age') +
labs(title = 'Contingency table between age range and assignment group',
caption = paste0('Assuming age distributions are the same among assignment groups, a chi-squared test for independence with Monte \nCarlo simulation yields p=',
round(age_chisq$p.value,4),
', suggesting that there is no relationship between age and assignment groups at a \nsignificance level of 0.05.')) +
theme(plot.caption = element_text(hjust = 0))
# TODO format figures and captions
#check balance between education levels
edu_chisq <- chisq.test(d_respondents[ , table(Assignment_Group, Education_Level)],simulate.p.value = TRUE)
create_heatmap(var1 = d_respondents$Assignment_Group,var2 = d_respondents$Education_Level) +
xlab('Assignment Group') +
ylab('Education Level') +
labs(title = 'Contingency table between education and assignment group',
caption = paste0('Assuming education distributions are the same among assignment groups, a chi-squared test for \nindependence with Monte Carlo simulation yields p=',
round(edu_chisq$p.value,4),
', suggesting that there is no relationship \nbetween education and assignment groups at a significance level of 0.05.')) +
theme(plot.caption = element_text(hjust = 0))
# TODO format figures and captions
# out.width = "80%"
# check balance between US and non-US respondents
us_chisq <- chisq.test(d_respondents[ , table(Assignment_Group, US_Dummy)])
create_heatmap(var1 = d_respondents$Assignment_Group,var2 = d_respondents$US_Dummy) +
xlab('Assignment Group') +
ylab('Country') +
scale_y_discrete(breaks=c("0", "1"),
labels=c("Non-US", "United States")) +
labs(title = 'Contingency table between country and assignment group',
caption = paste0('Assuming country distributions are the same among assignment groups, a chi-squared test for independence with \n',
round(us_chisq$parameter,4),' degrees of freedom ', 'yields p=',
round(us_chisq$p.value,4),
', suggesting that there is no relationship between country and assignment \ngroups at a significance level of 0.05.')) +
theme(plot.caption = element_text(hjust = 0))
# ATE of treatment on Total Score
d_respondents[ Treatment_Dummy == 1, mean(Total_Score)] - d_respondents[ Treatment_Dummy == 0, mean(Total_Score)]
## [1] 0.5143
sd(d_respondents$Total_Score)
## [1] 3.743
# ATE of treatment on TaskPhase2 Score
d_respondents[ Treatment_Dummy == 1, mean(TaskPhase2_Score)] - d_respondents[ Treatment_Dummy == 0, mean(TaskPhase2_Score)]
## [1] 0.05337
sd(d_respondents$TaskPhase2_Score)
## [1] 0.1645
#trying 2SLS...but dont think it applies here
# d_respondents[ , lm(Total_Score ~ Education_Level)]
# d_respondents[ , ivreg(Total_Score ~ Education_Level | Assignment_Group)]
power.t.test( delta = .05, sd=.16, sig.level = 0.05, power=0.8)
##
## Two-sample t test power calculation
##
## n = 161.7
## delta = 0.05
## sd = 0.16
## sig.level = 0.05
## power = 0.8
## alternative = two.sided
##
## NOTE: n is number in *each* group
get_robust_se <- function(model){
# Get robust SE for use in stargazer
vcov <- vcovHC(model,type = "HC1")
return(sqrt(diag(vcov)))
}
# does any treatment have an effect on task phase 2 score?
mod_task2_a <- d_respondents[, lm(TaskPhase2_Score ~ Treatment_Dummy)]
mod_task2_b <- d_respondents[, lm(TaskPhase2_Score ~ Treatment_Dummy +
TaskPhase1_Score +
as.factor(Gender) +
as.factor(Education_Level) +
as.factor(Age_Range))]
stargazer(mod_task2_a,
mod_task2_b,
se = list(get_robust_se(mod_task2_a),get_robust_se(mod_task2_b)),
omit = c("Education_Level","Age_Range"),
add.lines = list(c('Education Fixed Effects', 'No','Yes'),
c('Age Fixed Effects','No','Yes')),
type='text')
##
## =====================================================================
## Dependent variable:
## ---------------------------------------------
## TaskPhase2_Score
## (1) (2)
## ---------------------------------------------------------------------
## Treatment_Dummy 0.053** 0.051**
## (0.022) (0.022)
##
## TaskPhase1_Score 0.240***
## (0.047)
##
## as.factor(Gender)Male -0.010
## (0.017)
##
## Constant 0.461*** 0.281***
## (0.019) (0.072)
##
## ---------------------------------------------------------------------
## Education Fixed Effects No Yes
## Age Fixed Effects No Yes
## Observations 350 350
## R2 0.017 0.117
## Adjusted R2 0.014 0.083
## Residual Std. Error 0.163 (df = 348) 0.158 (df = 336)
## F Statistic 5.911** (df = 1; 348) 3.433*** (df = 13; 336)
## =====================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#add an F test to compare
anova(mod_task2_a, mod_task2_b, test='F')
## Analysis of Variance Table
##
## Model 1: TaskPhase2_Score ~ Treatment_Dummy
## Model 2: TaskPhase2_Score ~ Treatment_Dummy + TaskPhase1_Score + as.factor(Gender) +
## as.factor(Education_Level) + as.factor(Age_Range)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 348 9.29
## 2 336 8.34 12 0.95 3.19 0.00024 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#does the specific treatment group have an effect on task phase 2 score?
mod_task2_c <- d_respondents[, lm(TaskPhase2_Score ~ as.factor(Assignment_Group))]
mod_task2_d <- d_respondents[, lm(TaskPhase2_Score ~ as.factor(Assignment_Group) +
TaskPhase1_Score +
as.factor(Gender) +
as.factor(Education_Level) +
as.factor(Age_Range))]
# Do you think that there are features of the data that might systematically predict that people will respond strongly or weakly to the treatment effect? List two that you think might be there, in the order that you would like to test them. Then, test for these heterogeneities.
# TODO update this heterogeneity issue. I'm not quite sure this applies because they're both considered treatment groups, just with different granularities
# mod5 <- d_respondents[, lm(TaskPhase2_Score ~ Treatment_Dummy + as.factor(assign_bin) +
# Treatment_Dummy * as.factor(assign_bin))]
stargazer(mod_task2_c,
mod_task2_d,
se = list(get_robust_se(mod_task2_c),get_robust_se(mod_task2_d)),
omit = c("Education_Level","Age_Range"),
add.lines = list(c('Education Fixed Effects', 'No','Yes'),
c('Age Fixed Effects','No','Yes')),
type='text')
##
## =======================================================================================
## Dependent variable:
## -------------------------------------------
## TaskPhase2_Score
## (1) (2)
## ---------------------------------------------------------------------------------------
## as.factor(Assignment_Group)Medical Feedback 0.062** 0.055*
## (0.027) (0.029)
##
## as.factor(Assignment_Group)Negative Images 0.034 0.039
## (0.027) (0.027)
##
## as.factor(Assignment_Group)Positive Images 0.053* 0.050*
## (0.029) (0.027)
##
## as.factor(Assignment_Group)Self-Reflect 0.065** 0.058**
## (0.028) (0.029)
##
## TaskPhase1_Score 0.238***
## (0.048)
##
## as.factor(Gender)Male -0.010
## (0.017)
##
## Constant 0.461*** 0.282***
## (0.019) (0.073)
##
## ---------------------------------------------------------------------------------------
## Education Fixed Effects No Yes
## Age Fixed Effects No Yes
## Observations 350 350
## R2 0.021 0.119
## Adjusted R2 0.010 0.076
## Residual Std. Error 0.164 (df = 345) 0.158 (df = 333)
## F Statistic 1.874 (df = 4; 345) 2.805*** (df = 16; 333)
## =======================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
anova(mod_task2_c, mod_task2_d, test='F')
## Analysis of Variance Table
##
## Model 1: TaskPhase2_Score ~ as.factor(Assignment_Group)
## Model 2: TaskPhase2_Score ~ as.factor(Assignment_Group) + TaskPhase1_Score +
## as.factor(Gender) + as.factor(Education_Level) + as.factor(Age_Range)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 345 9.24
## 2 333 8.32 12 0.921 3.07 0.00039 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# test final task and any treatment
mod_task3_a <- d_respondents[, lm(TaskPhase3_Score ~ Treatment_Dummy)]
mod_task3_b <- d_respondents[, lm(TaskPhase3_Score ~ Treatment_Dummy +
TaskPhase1_Score +
as.factor(Gender) +
as.factor(Education_Level) +
as.factor(Age_Range))]
stargazer(mod_task3_a,
mod_task3_b,
se = list(get_robust_se(mod_task3_a),get_robust_se(mod_task3_b)),
omit = c("Education_Level","Age_Range"),
add.lines = list(c('Education Fixed Effects', 'No','Yes'),
c('Age Fixed Effects','No','Yes')),
type='text')
##
## ===================================================================
## Dependent variable:
## -------------------------------------------
## TaskPhase3_Score
## (1) (2)
## -------------------------------------------------------------------
## Treatment_Dummy 0.004 0.002
## (0.019) (0.019)
##
## TaskPhase1_Score 0.161***
## (0.047)
##
## as.factor(Gender)Male -0.004
## (0.017)
##
## Constant 0.538*** 0.515***
## (0.017) (0.064)
##
## -------------------------------------------------------------------
## Education Fixed Effects No Yes
## Age Fixed Effects No Yes
## Observations 350 350
## R2 0.0001 0.084
## Adjusted R2 -0.003 0.049
## Residual Std. Error 0.160 (df = 348) 0.155 (df = 336)
## F Statistic 0.034 (df = 1; 348) 2.384*** (df = 13; 336)
## ===================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
anova(mod_task3_a, mod_task3_b, test='F')
## Analysis of Variance Table
##
## Model 1: TaskPhase3_Score ~ Treatment_Dummy
## Model 2: TaskPhase3_Score ~ Treatment_Dummy + TaskPhase1_Score + as.factor(Gender) +
## as.factor(Education_Level) + as.factor(Age_Range)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 348 8.86
## 2 336 8.12 12 0.748 2.58 0.0027 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# test final task and specific treatment
mod_task3_c <- d_respondents[, lm(TaskPhase3_Score ~ as.factor(Assignment_Group))]
mod_task3_d <- d_respondents[, lm(TaskPhase3_Score ~ as.factor(Assignment_Group) +
TaskPhase1_Score +
as.factor(Gender) +
as.factor(Education_Level) +
as.factor(Age_Range))]
stargazer(mod_task3_c,
mod_task3_d,
se = list(get_robust_se(mod_task3_c),get_robust_se(mod_task3_d)),
omit = c("Education_Level","Age_Range"),
add.lines = list(c('Education Fixed Effects', 'No','Yes'),
c('Age Fixed Effects','No','Yes')),
type='text')
##
## ======================================================================================
## Dependent variable:
## ------------------------------------------
## TaskPhase3_Score
## (1) (2)
## --------------------------------------------------------------------------------------
## as.factor(Assignment_Group)Medical Feedback 0.022 0.011
## (0.026) (0.026)
##
## as.factor(Assignment_Group)Negative Images -0.015 -0.011
## (0.027) (0.026)
##
## as.factor(Assignment_Group)Positive Images -0.001 0.004
## (0.025) (0.025)
##
## as.factor(Assignment_Group)Self-Reflect 0.010 0.005
## (0.026) (0.026)
##
## TaskPhase1_Score 0.157***
## (0.047)
##
## as.factor(Gender)Male -0.004
## (0.017)
##
## Constant 0.538*** 0.518***
## (0.017) (0.064)
##
## --------------------------------------------------------------------------------------
## Education Fixed Effects No Yes
## Age Fixed Effects No Yes
## Observations 350 350
## R2 0.006 0.087
## Adjusted R2 -0.005 0.043
## Residual Std. Error 0.160 (df = 345) 0.156 (df = 333)
## F Statistic 0.545 (df = 4; 345) 1.971** (df = 16; 333)
## ======================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
anova(mod_task3_c, mod_task3_d, test='F')
## Analysis of Variance Table
##
## Model 1: TaskPhase3_Score ~ as.factor(Assignment_Group)
## Model 2: TaskPhase3_Score ~ as.factor(Assignment_Group) + TaskPhase1_Score +
## as.factor(Gender) + as.factor(Education_Level) + as.factor(Age_Range)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 345 8.81
## 2 333 8.10 12 0.711 2.44 0.0048 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_task3_e <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase2_Score)]
mod_task3_f <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase2_Score + Treatment_Dummy)]
mod_task3_g <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase2_Score + as.factor(Assignment_Group))]
mod_task3_h <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase2_Score +
as.factor(Assignment_Group) +
as.factor(Gender) +
as.factor(Education_Level) +
as.factor(Age_Range))]
stargazer(mod_task3_e,
mod_task3_f,
mod_task3_g,
mod_task3_h,
se = list(get_robust_se(mod_task3_e),
get_robust_se(mod_task3_f),
get_robust_se(mod_task3_h)),
get_robust_se(mod_task3_g),
omit = c("Education_Level","Age_Range"),
add.lines = list(c('Education Fixed Effects', 'No','No','No','Yes'),
c('Age Fixed Effects','No','No','No','Yes')),
type='html')
| Dependent variable: | ||||
| TaskPhase3_Score | ||||
| (1) | (2) | (3) | (4) | |
| TaskPhase2_Score | 0.239*** | 0.242*** | 0.238*** | 0.241*** |
| (0.050) | (0.051) | (0.052) | (0.051) | |
| Treatment_Dummy | -0.009 | |||
| (0.019) | ||||
| as.factor(Assignment_Group)Medical Feedback | 0.008 | 0.001 | ||
| (0.027) | (0.027) | |||
| as.factor(Assignment_Group)Negative Images | -0.023 | -0.023 | ||
| (0.026) | (0.026) | |||
| as.factor(Assignment_Group)Positive Images | -0.013 | -0.007 | ||
| (0.025) | (0.026) | |||
| as.factor(Assignment_Group)Self-Reflect | -0.005 | -0.010 | ||
| (0.025) | (0.027) | |||
| as.factor(Gender)Male | -0.003 | |||
| (0.017) | ||||
| Constant | 0.420*** | 0.426*** | 0.428*** | 0.520*** |
| (0.026) | (0.028) | (0.062) | (0.062) | |
| Education Fixed Effects | No | No | No | Yes |
| Age Fixed Effects | No | No | No | Yes |
| Observations | 350 | 350 | 350 | 350 |
| R2 | 0.061 | 0.061 | 0.065 | 0.113 |
| Adjusted R2 | 0.058 | 0.056 | 0.052 | 0.070 |
| Residual Std. Error | 0.155 (df = 348) | 0.155 (df = 347) | 0.155 (df = 344) | 0.154 (df = 333) |
| F Statistic | 22.540*** (df = 1; 348) | 11.330*** (df = 2; 347) | 4.815*** (df = 5; 344) | 2.654*** (df = 16; 333) |
| Note: | p<0.1; p<0.05; p<0.01 | |||
| (Intercept) | TaskPhase2_Score | as.factor(Assignment_Group)Medical Feedback | as.factor(Assignment_Group)Negative Images | as.factor(Assignment_Group)Positive Images | as.factor(Assignment_Group)Self-Reflect |
| 0.028 | 0.051 | 0.026 | 0.026 | 0.025 | 0.024 |
anova(mod_task3_e, mod_task3_f, test='F')
Analysis of Variance Table
Model 1: TaskPhase3_Score ~ TaskPhase2_Score Model 2: TaskPhase3_Score ~ TaskPhase2_Score + Treatment_Dummy Res.Df RSS Df Sum of Sq F Pr(>F) 1 348 8.33
2 347 8.32 1 0.00436 0.18 0.67
anova(mod_task3_g, mod_task3_h, test='F')
Analysis of Variance Table
Model 1: TaskPhase3_Score ~ TaskPhase2_Score + as.factor(Assignment_Group) Model 2: TaskPhase3_Score ~ TaskPhase2_Score + as.factor(Assignment_Group) + as.factor(Gender) + as.factor(Education_Level) + as.factor(Age_Range) Res.Df RSS Df Sum of Sq F Pr(>F)
1 344 8.29
2 333 7.86 11 0.423 1.63 0.09 . — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
##
mod_task3_i <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score)]
mod_task3_j <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score + Treatment_Dummy)]
mod_task3_k <- d_respondents[ , lm(TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score + as.factor(Assignment_Group))]
mod_task3_l <- d_respondents[, lm(TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score +
as.factor(Assignment_Group) +
as.factor(Gender) +
as.factor(Education_Level) +
as.factor(Age_Range))]
stargazer(mod_task3_i,
mod_task3_j,
mod_task3_k,
mod_task3_l,
se = list(get_robust_se(mod_task3_i),
get_robust_se(mod_task3_j),
get_robust_se(mod_task3_k),
get_robust_se(mod_task3_l)),
type='html')
| Dependent variable: | ||||
| TaskPhase3_Score | ||||
| (1) | (2) | (3) | (4) | |
| TaskPhase1_Score | 0.113** | 0.113** | 0.109** | 0.108** |
| (0.046) | (0.046) | (0.046) | (0.048) | |
| TaskPhase2_Score | 0.202*** | 0.204*** | 0.202*** | 0.208*** |
| (0.053) | (0.054) | (0.053) | (0.054) | |
| Treatment_Dummy | -0.007 | |||
| (0.019) | ||||
| as.factor(Assignment_Group)Medical Feedback | 0.007 | -0.001 | ||
| (0.026) | (0.026) | |||
| as.factor(Assignment_Group)Negative Images | -0.019 | -0.019 | ||
| (0.026) | (0.026) | |||
| as.factor(Assignment_Group)Positive Images | -0.012 | -0.007 | ||
| (0.024) | (0.025) | |||
| as.factor(Assignment_Group)Self-Reflect | -0.002 | -0.007 | ||
| (0.024) | (0.025) | |||
| as.factor(Gender)Male | -0.002 | |||
| (0.016) | ||||
| as.factor(Education_Level)Bachelor’s degree | -0.043 | |||
| (0.046) | ||||
| as.factor(Education_Level)High school | -0.058 | |||
| (0.054) | ||||
| as.factor(Education_Level)Master’s degree and above | -0.065 | |||
| (0.047) | ||||
| as.factor(Education_Level)Some high school | 0.133*** | |||
| (0.051) | ||||
| as.factor(Education_Level)Trade school | -0.131** | |||
| (0.062) | ||||
| as.factor(Age_Range)25-34 | -0.021 | |||
| (0.033) | ||||
| as.factor(Age_Range)35-44 | -0.062* | |||
| (0.036) | ||||
| as.factor(Age_Range)45-54 | -0.049 | |||
| (0.043) | ||||
| as.factor(Age_Range)55-64 | -0.056 | |||
| (0.036) | ||||
| as.factor(Age_Range)Above 65 | 0.201*** | |||
| (0.050) | ||||
| Constant | 0.370*** | 0.375*** | 0.378*** | 0.460*** |
| (0.032) | (0.034) | (0.034) | (0.067) | |
| Observations | 350 | 350 | 350 | 350 |
| R2 | 0.077 | 0.078 | 0.081 | 0.127 |
| Adjusted R2 | 0.072 | 0.070 | 0.064 | 0.082 |
| Residual Std. Error | 0.154 (df = 347) | 0.154 (df = 346) | 0.154 (df = 343) | 0.153 (df = 332) |
| F Statistic | 14.520*** (df = 2; 347) | 9.693*** (df = 3; 346) | 5.010*** (df = 6; 343) | 2.844*** (df = 17; 332) |
| Note: | p<0.1; p<0.05; p<0.01 | |||
anova(mod_task3_i, mod_task3_j, test = 'F')
Analysis of Variance Table
Model 1: TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score Model 2: TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score + Treatment_Dummy Res.Df RSS Df Sum of Sq F Pr(>F) 1 347 8.18
2 346 8.18 1 0.00253 0.11 0.74
anova(mod_task3_k, mod_task3_l, test = 'F')
Analysis of Variance Table
Model 1: TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score + as.factor(Assignment_Group) Model 2: TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score + as.factor(Assignment_Group) + as.factor(Gender) + as.factor(Education_Level) + as.factor(Age_Range) Res.Df RSS Df Sum of Sq F Pr(>F)
1 343 8.15
2 332 7.74 11 0.413 1.61 0.094 . — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
# lm(TaskPhase3_Score ~ TaskPhase2_Score) vs lm(TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score)
anova(mod_task3_e, mod_task3_i, test = 'F')
Analysis of Variance Table
Model 1: TaskPhase3_Score ~ TaskPhase2_Score Model 2: TaskPhase3_Score ~ TaskPhase1_Score + TaskPhase2_Score Res.Df RSS Df Sum of Sq F Pr(>F)
1 348 8.33
2 347 8.18 1 0.145 6.17 0.013 * — Signif. codes: 0 ‘’ 0.001 ’’ 0.01 ’’ 0.05 ‘.’ 0.1 ’ ’ 1
# compare self-reflect against medical feedback groups?
#make dummies
d_respondents[ , Self_Reflect_Dummy := ifelse(Assignment_Group == "Self-Reflect", 1, 0)]
d_respondents[ , Med_Feedback_Dummy := ifelse(Assignment_Group == "Medical Feedback", 1, 0)]
mod_test_dummies1 <- d_respondents[ , lm(TaskPhase2_Score ~ Treatment_Dummy + Self_Reflect_Dummy)]
mod_test_dummies2 <- d_respondents[ , lm(TaskPhase2_Score ~ Treatment_Dummy + Med_Feedback_Dummy)]
stargazer(mod_test_dummies1,
mod_test_dummies2,
se = list(get_robust_se(mod_test_dummies1),
get_robust_se(mod_test_dummies2)),
type = 'text')
##
## ===========================================================
## Dependent variable:
## ----------------------------
## TaskPhase2_Score
## (1) (2)
## -----------------------------------------------------------
## Treatment_Dummy 0.050** 0.051**
## (0.022) (0.023)
##
## Self_Reflect_Dummy 0.016
## (0.023)
##
## Med_Feedback_Dummy 0.011
## (0.022)
##
## Constant 0.461*** 0.461***
## (0.019) (0.019)
##
## -----------------------------------------------------------
## Observations 350 350
## R2 0.018 0.017
## Adjusted R2 0.012 0.012
## Residual Std. Error (df = 347) 0.163 0.164
## F Statistic (df = 2; 347) 3.192** 3.079**
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
### linear model playground
d_test <- d_respondents[,c("Assignment_Group","TaskPhase1_Score","TaskPhase2_Score","TaskPhase3_Score",'Age_Range',"Education_Level","Gender","Treatment_Dummy")]
#does treatment have an effect on total score?
mod_test1 <- d_test[ , lm(TaskPhase2_Score ~ TaskPhase1_Score + Treatment_Dummy)]
mod_test2 <- d_test[, lm(TaskPhase2_Score ~ TaskPhase1_Score + Treatment_Dummy + (TaskPhase1_Score * Treatment_Dummy))]
#does treatment and pretreatment score have an effect on total score?
###
# seems that if i ad in TaskPhase1 to the linear model, the RSEs disappear...
mod_test3 <- d_test[, lm(TaskPhase2_Score ~ Treatment_Dummy +
TaskPhase1_Score +
as.factor(Education_Level) +
as.factor(Gender) +
as.factor(Age_Range)
)]
coeftest(mod_test3, vcov = vcovHC(mod_test3,"HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value
## (Intercept) 0.28108 0.07213 3.90
## Treatment_Dummy 0.05071 0.02217 2.29
## TaskPhase1_Score 0.24027 0.04682 5.13
## as.factor(Education_Level)Bachelor's degree -0.00683 0.04856 -0.14
## as.factor(Education_Level)High school 0.04068 0.05619 0.72
## as.factor(Education_Level)Master's degree and above -0.01698 0.05128 -0.33
## as.factor(Education_Level)Some high school -0.12065 0.05108 -2.36
## as.factor(Education_Level)Trade school 0.02867 0.06926 0.41
## as.factor(Gender)Male -0.00995 0.01735 -0.57
## as.factor(Age_Range)25-34 0.04469 0.03768 1.19
## as.factor(Age_Range)35-44 0.04198 0.03952 1.06
## as.factor(Age_Range)45-54 0.06975 0.04178 1.67
## as.factor(Age_Range)55-64 0.08035 0.04252 1.89
## as.factor(Age_Range)Above 65 0.12575 0.05172 2.43
## Pr(>|t|)
## (Intercept) 0.00012 ***
## Treatment_Dummy 0.02281 *
## TaskPhase1_Score 4.9e-07 ***
## as.factor(Education_Level)Bachelor's degree 0.88827
## as.factor(Education_Level)High school 0.46957
## as.factor(Education_Level)Master's degree and above 0.74080
## as.factor(Education_Level)Some high school 0.01874 *
## as.factor(Education_Level)Trade school 0.67922
## as.factor(Gender)Male 0.56658
## as.factor(Age_Range)25-34 0.23642
## as.factor(Age_Range)35-44 0.28890
## as.factor(Age_Range)45-54 0.09593 .
## as.factor(Age_Range)55-64 0.05968 .
## as.factor(Age_Range)Above 65 0.01556 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(d_respondents$TaskPhase1_Score)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.200 0.500 0.600 0.606 0.700 1.000
stargazer(mod_test1,
mod_test2,
mod_test3,
se = list(get_robust_se(mod_test1),get_robust_se(mod_test2), get_robust_se(mod_test3)),
type='text')
##
## ===========================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------
## TaskPhase2_Score
## (1) (2) (3)
## ---------------------------------------------------------------------------------------------------------------------------
## TaskPhase1_Score 0.249*** 0.153 0.240***
## (0.044) (0.095) (0.047)
##
## as.factor(Education_Level)Bachelor's degree -0.007
## (0.049)
##
## as.factor(Education_Level)High school 0.041
## (0.056)
##
## as.factor(Education_Level)Master's degree and above -0.017
## (0.051)
##
## as.factor(Education_Level)Some high school -0.121**
## (0.051)
##
## as.factor(Education_Level)Trade school 0.029
## (0.069)
##
## as.factor(Gender)Male -0.010
## (0.017)
##
## as.factor(Age_Range)25-34 0.045
## (0.038)
##
## as.factor(Age_Range)35-44 0.042
## (0.040)
##
## as.factor(Age_Range)45-54 0.070*
## (0.042)
##
## as.factor(Age_Range)55-64 0.080*
## (0.043)
##
## as.factor(Age_Range)Above 65 0.126**
## (0.052)
##
## Treatment_Dummy 0.054** -0.019 0.051**
## (0.021) (0.065) (0.022)
##
## TaskPhase1_Score:Treatment_Dummy 0.120
## (0.107)
##
## Constant 0.310*** 0.368*** 0.281***
## (0.032) (0.057) (0.072)
##
## ---------------------------------------------------------------------------------------------------------------------------
## Observations 350 350 350
## R2 0.098 0.101 0.117
## Adjusted R2 0.092 0.093 0.083
## Residual Std. Error 0.157 (df = 347) 0.157 (df = 346) 0.158 (df = 336)
## F Statistic 18.780*** (df = 2; 347) 12.920*** (df = 3; 346) 3.433*** (df = 13; 336)
## ===========================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
mod_test4 <- d_test[ , lm(TaskPhase3_Score ~ TaskPhase2_Score)]
coeftest(mod_test4)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4205 0.0267 15.77 <2e-16 ***
## TaskPhase2_Score 0.2389 0.0503 4.75 3e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# use Robust SE
mod_test2 <- d_respondents[, lm(TaskPhase2_Score ~ Treatment_Dummy + as.factor(Education_Level) + (Treatment_Dummy * as.factor(Education_Level)))]
mod_test2$vcovHC_ <- vcovHC(mod_test2)
coeftest(mod_test2, vcov = mod_test2$vcovHC_)
##
## t test of coefficients:
##
## Estimate
## (Intercept) 0.53333
## Treatment_Dummy 0.00667
## as.factor(Education_Level)Bachelor's degree -0.07424
## as.factor(Education_Level)High school -0.03333
## as.factor(Education_Level)Master's degree and above -0.07333
## as.factor(Education_Level)Some high school -0.14000
## as.factor(Education_Level)Trade school -0.23333
## Treatment_Dummy:as.factor(Education_Level)Bachelor's degree 0.04142
## Treatment_Dummy:as.factor(Education_Level)High school 0.07515
## Treatment_Dummy:as.factor(Education_Level)Master's degree and above 0.04386
## Treatment_Dummy:as.factor(Education_Level)Trade school 0.30762
## Std. Error
## (Intercept) NA
## Treatment_Dummy NA
## as.factor(Education_Level)Bachelor's degree NA
## as.factor(Education_Level)High school NA
## as.factor(Education_Level)Master's degree and above NA
## as.factor(Education_Level)Some high school NA
## as.factor(Education_Level)Trade school NA
## Treatment_Dummy:as.factor(Education_Level)Bachelor's degree NA
## Treatment_Dummy:as.factor(Education_Level)High school NA
## Treatment_Dummy:as.factor(Education_Level)Master's degree and above NA
## Treatment_Dummy:as.factor(Education_Level)Trade school NA
## t value
## (Intercept) NA
## Treatment_Dummy NA
## as.factor(Education_Level)Bachelor's degree NA
## as.factor(Education_Level)High school NA
## as.factor(Education_Level)Master's degree and above NA
## as.factor(Education_Level)Some high school NA
## as.factor(Education_Level)Trade school NA
## Treatment_Dummy:as.factor(Education_Level)Bachelor's degree NA
## Treatment_Dummy:as.factor(Education_Level)High school NA
## Treatment_Dummy:as.factor(Education_Level)Master's degree and above NA
## Treatment_Dummy:as.factor(Education_Level)Trade school NA
## Pr(>|t|)
## (Intercept) NA
## Treatment_Dummy NA
## as.factor(Education_Level)Bachelor's degree NA
## as.factor(Education_Level)High school NA
## as.factor(Education_Level)Master's degree and above NA
## as.factor(Education_Level)Some high school NA
## as.factor(Education_Level)Trade school NA
## Treatment_Dummy:as.factor(Education_Level)Bachelor's degree NA
## Treatment_Dummy:as.factor(Education_Level)High school NA
## Treatment_Dummy:as.factor(Education_Level)Master's degree and above NA
## Treatment_Dummy:as.factor(Education_Level)Trade school NA